# Kalmia analyze fruit size at end of season
# Using fruit sizes calculated from image segmentation in Python with opencv
#  Callin Switzer
# 20 October 2016
# 10 Feb 2017 Update: Conducted Statistical Modeling with LMER and GLMER
# 11 March 2017 Update: added random effect to MER models to account for plant lineage

Read in data

ipak <- function(pkg){
     new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
     if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
     sapply(pkg, require, character.only = TRUE)
}

packages <- c("ggplot2", 'lme4', 'plyr', 'influence.ME', 'sjPlot', 'multcomp', 'lsmeans', 'Matrix')
ipak(packages)
##      ggplot2         lme4         plyr influence.ME       sjPlot 
##         TRUE         TRUE         TRUE         TRUE         TRUE 
##     multcomp      lsmeans       Matrix 
##         TRUE         TRUE         TRUE
setwd("/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/DatasetsSupplemental/")

theme_set(theme_classic())

kfrt <- read.csv("kalmiaFruitFinal.csv", stringsAsFactors = FALSE)
nrow(kfrt[kfrt$dia_mm == 20.0,]) # number of images taken
## [1] 92
# clean and process data
kfrt <- kfrt[kfrt$dia_mm != 20.0, ] # 20 is the reference object in segmented images
nrow(kfrt) # number of total fruits measured
## [1] 1305
# label treatmens and access numbers
kfrt$trt <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][2])
kfrt$accessNum <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][1])

kfrt$trt <- mapvalues(kfrt$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"))
# add plant lineage (all plants that start with 1129 are from the lineage, 673_69 )
plantInds <- 1:nrow(kfrt) %in% grep(pattern = "1129", x = kfrt$plantNum)
kfrt$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
     ifelse(plantInds[x], "673_69", paste(strsplit(kfrt$plantNum[x], split = "_")[[1]][1:2], collapse = "_") )})

Add plants with 0 count to the dataset

# count number of fruits
counts = as.data.frame(table(kfrt$plantNum))
counts$trt = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][2])

counts$trt <- mapvalues(counts$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"))


counts$accessNum = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][1])


# add in the trts and accession numbers that had counts of 0
# read in error-checked datasheet
kalNotes <- read.csv("KalmiaDailyDatasheets_ErrorChecked.csv")

# get the accession numbers I should have
accNumsHave <- unique(kalNotes$Plant.Number)
accNumsHave <- accNumsHave[!(accNumsHave %in% c("", "Plant Number"))]

#change formatting to match above
accNumsHave <- gsub("#", "", accNumsHave)
accNumsHave <- gsub("\ ", "_", accNumsHave)
accNumsHave <- gsub("\\-", "_", accNumsHave)
accNumsHave <- gsub("\\*", "_", accNumsHave)
accNumsHave <-toupper(accNumsHave)

shouldHave <- as.data.frame(t(sapply(accNumsHave, function(x) as.character(paste(x, c(1:4), sep = "__")))))
row.names(shouldHave) <- NULL
shouldHave1 <- c(as.character(shouldHave[, 1]), 
                 as.character(shouldHave[, 2]), 
                 as.character(shouldHave[, 3]), 
                 as.character(shouldHave[, 4]))

# find missing ones -- this is the ones that
# are in the daily datasheet that aren't in the fruit measurements
missingTrts <- setdiff(shouldHave1,unique(kfrt$plantNum[kfrt$trt != "5"]))

# this should be 0 -- the ones from the fruit measurements that aren't in the daily datasheet
setdiff(unique(kfrt$plantNum[kfrt$trt != "Unbagged_2"]), shouldHave1)
## character(0)
# here's the list of plants that had their bags/tags go missing during the experiment (meaning that
# I was unable to collect fruits)
# note: "677_66_MASS__1" was the only plant that was missing a tag during the final collection
# that wasn't missing in one of my previous checks. 
NAList <- c("1129_74_E__4", "1129_74_C__4", "39_60_A__3", "685_70_A__2", "677_66_MASS__1")

# note: this ignores trt#5 which is the same as #3
ZeroFruitPlants <- missingTrts[!(missingTrts %in% NAList)]

zfdf <- data.frame(Var1 = ZeroFruitPlants, 
                   Freq = 0, 
                   trt = sapply(X = 1:length(ZeroFruitPlants), 
                                FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]), 
                                                           split = "__")[[1]][2]),
                   accessNum = sapply(X = 1:length(ZeroFruitPlants), 
                                            FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]), 
                                                                       split = "__")[[1]][1])
                   )

zfdf$trt <- mapvalues(zfdf$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"))
## The following `from` values were not present in `x`: 3, 4, 5
countFin <- rbind(counts, zfdf)


# final fruit counts for the fruits collected at the end of the experiment
countFin <- countFin[order(countFin$accessNum, countFin$trt, decreasing = FALSE), ]

# change labels from unbagged to conntrol
countFin$trt <- mapvalues(countFin$trt, c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2")
                          , c("Bagged", "Bagged & Selfed", "Control", 
                                                "Unbagged & Outcrossed", "Control_2"))
# change levels, so that control is first
countFin$trt <- factor(countFin$trt, levels = c("Control","Control_2", "Bagged", "Bagged & Selfed", 
                                                "Unbagged & Outcrossed"))

plantInds <- 1:nrow(countFin) %in% grep(pattern = "1129", x = countFin$accessNum)
countFin$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
     ifelse(plantInds[x], "673_69", paste(strsplit(countFin$accessNum[x], split = "_")[[1]][1:2], collapse = "_") )})

Visualize counts of fruits

ggplot(countFin, aes(x = trt , y = Freq, fill = trt)) + 
     geom_boxplot() +
    # geom_violin()+
     
     labs(y = "Number of fruits", x = "Treatment") + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

saveDir <- "/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/Media/"
ggsave(paste0(saveDir, "KalmiaFruitNumber.pdf"), width = 10, height = 8)

Visualize counts of fruits (ignore treatment #5)

ggplot(countFin[!(countFin$trt %in% 'Control_2'), ], aes(x = trt , y = Freq, fill = trt)) + 
     geom_boxplot() +
     # geom_point(aes(color = plantLineage)) + 
     labs(y = "Number of fruits", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 35, hjust = 1), 
           legend.position = 'none') + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

ggsave(paste0(saveDir, "KalmiaFruitNumber_trt1_4Only.pdf"), width = 5, height = 4)

Use GLMM’s to find differences in number of fruits

First, summarize the dataset that I will be using

# I'm ignoring control#2, because it was originally intended for another experiment (analysis wasn't planned)
cf1 <- countFin[countFin$trt != "Control_2",]
nrow(countFin) # number of total counts, including Control2
## [1] 104
sum(cf1$Freq) # total number of fruits in the analysis of count
## [1] 1035
# Number of counts, excluding control_2
# this is different from the number of photos
# because I didn't take photos on 0-count plants
nrow(cf1) 
## [1] 84
data.frame(table(cf1$trt)) # number of plants in each treatment that we are analyzing
##                    Var1 Freq
## 1               Control   21
## 2             Control_2    0
## 3                Bagged   22
## 4       Bagged & Selfed   21
## 5 Unbagged & Outcrossed   20
data.frame(table(cf1$accessNum)) # shows number of counts per plant -- 4 means that we counted all four treatments
##           Var1 Freq
## 1    1129_74_A    4
## 2    1129_74_B    4
## 3    1129_74_C    3
## 4    1129_74_E    3
## 5    1129_74_F    4
## 6    12_2006_A    4
## 7  128_96_MASS    4
## 8  132_98_MASS    4
## 9     150_58_A    4
## 10    232_46_A    4
## 11     35_40_C    4
## 12     39_60_A    3
## 13    425_74_D    4
## 14    440_66_A    4
## 15     46_18_A    4
## 16 572_64_MASS    4
## 17    624_64_B    4
## 18 667_66_MASS    4
## 19    673_69_B    4
## 20    685_70_A    3
## 21    721_79_B    4
## 22 UNLABELED_1    4
# shows which treatments / plants are missing from analysis
# this is different from the plants that just had 0 counts
data.frame(table(interaction(cf1$accessNum , cf1$trt))) 
##                                  Var1 Freq
## 1                   1129_74_A.Control    1
## 2                   1129_74_B.Control    1
## 3                   1129_74_C.Control    1
## 4                   1129_74_E.Control    1
## 5                   1129_74_F.Control    1
## 6                   12_2006_A.Control    1
## 7                 128_96_MASS.Control    1
## 8                 132_98_MASS.Control    1
## 9                    150_58_A.Control    1
## 10                   232_46_A.Control    1
## 11                    35_40_C.Control    1
## 12                    39_60_A.Control    0
## 13                   425_74_D.Control    1
## 14                   440_66_A.Control    1
## 15                    46_18_A.Control    1
## 16                572_64_MASS.Control    1
## 17                   624_64_B.Control    1
## 18                667_66_MASS.Control    1
## 19                   673_69_B.Control    1
## 20                   685_70_A.Control    1
## 21                   721_79_B.Control    1
## 22                UNLABELED_1.Control    1
## 23                1129_74_A.Control_2    0
## 24                1129_74_B.Control_2    0
## 25                1129_74_C.Control_2    0
## 26                1129_74_E.Control_2    0
## 27                1129_74_F.Control_2    0
## 28                12_2006_A.Control_2    0
## 29              128_96_MASS.Control_2    0
## 30              132_98_MASS.Control_2    0
## 31                 150_58_A.Control_2    0
## 32                 232_46_A.Control_2    0
## 33                  35_40_C.Control_2    0
## 34                  39_60_A.Control_2    0
## 35                 425_74_D.Control_2    0
## 36                 440_66_A.Control_2    0
## 37                  46_18_A.Control_2    0
## 38              572_64_MASS.Control_2    0
## 39                 624_64_B.Control_2    0
## 40              667_66_MASS.Control_2    0
## 41                 673_69_B.Control_2    0
## 42                 685_70_A.Control_2    0
## 43                 721_79_B.Control_2    0
## 44              UNLABELED_1.Control_2    0
## 45                   1129_74_A.Bagged    1
## 46                   1129_74_B.Bagged    1
## 47                   1129_74_C.Bagged    1
## 48                   1129_74_E.Bagged    1
## 49                   1129_74_F.Bagged    1
## 50                   12_2006_A.Bagged    1
## 51                 128_96_MASS.Bagged    1
## 52                 132_98_MASS.Bagged    1
## 53                    150_58_A.Bagged    1
## 54                    232_46_A.Bagged    1
## 55                     35_40_C.Bagged    1
## 56                     39_60_A.Bagged    1
## 57                    425_74_D.Bagged    1
## 58                    440_66_A.Bagged    1
## 59                     46_18_A.Bagged    1
## 60                 572_64_MASS.Bagged    1
## 61                    624_64_B.Bagged    1
## 62                 667_66_MASS.Bagged    1
## 63                    673_69_B.Bagged    1
## 64                    685_70_A.Bagged    1
## 65                    721_79_B.Bagged    1
## 66                 UNLABELED_1.Bagged    1
## 67          1129_74_A.Bagged & Selfed    1
## 68          1129_74_B.Bagged & Selfed    1
## 69          1129_74_C.Bagged & Selfed    1
## 70          1129_74_E.Bagged & Selfed    1
## 71          1129_74_F.Bagged & Selfed    1
## 72          12_2006_A.Bagged & Selfed    1
## 73        128_96_MASS.Bagged & Selfed    1
## 74        132_98_MASS.Bagged & Selfed    1
## 75           150_58_A.Bagged & Selfed    1
## 76           232_46_A.Bagged & Selfed    1
## 77            35_40_C.Bagged & Selfed    1
## 78            39_60_A.Bagged & Selfed    1
## 79           425_74_D.Bagged & Selfed    1
## 80           440_66_A.Bagged & Selfed    1
## 81            46_18_A.Bagged & Selfed    1
## 82        572_64_MASS.Bagged & Selfed    1
## 83           624_64_B.Bagged & Selfed    1
## 84        667_66_MASS.Bagged & Selfed    1
## 85           673_69_B.Bagged & Selfed    1
## 86           685_70_A.Bagged & Selfed    0
## 87           721_79_B.Bagged & Selfed    1
## 88        UNLABELED_1.Bagged & Selfed    1
## 89    1129_74_A.Unbagged & Outcrossed    1
## 90    1129_74_B.Unbagged & Outcrossed    1
## 91    1129_74_C.Unbagged & Outcrossed    0
## 92    1129_74_E.Unbagged & Outcrossed    0
## 93    1129_74_F.Unbagged & Outcrossed    1
## 94    12_2006_A.Unbagged & Outcrossed    1
## 95  128_96_MASS.Unbagged & Outcrossed    1
## 96  132_98_MASS.Unbagged & Outcrossed    1
## 97     150_58_A.Unbagged & Outcrossed    1
## 98     232_46_A.Unbagged & Outcrossed    1
## 99      35_40_C.Unbagged & Outcrossed    1
## 100     39_60_A.Unbagged & Outcrossed    1
## 101    425_74_D.Unbagged & Outcrossed    1
## 102    440_66_A.Unbagged & Outcrossed    1
## 103     46_18_A.Unbagged & Outcrossed    1
## 104 572_64_MASS.Unbagged & Outcrossed    1
## 105    624_64_B.Unbagged & Outcrossed    1
## 106 667_66_MASS.Unbagged & Outcrossed    1
## 107    673_69_B.Unbagged & Outcrossed    1
## 108    685_70_A.Unbagged & Outcrossed    1
## 109    721_79_B.Unbagged & Outcrossed    1
## 110 UNLABELED_1.Unbagged & Outcrossed    1
# create "average plant" for the plants that are cuttings of 673_64

avgClf <- tapply(X = cf1$Freq, INDEX = interaction(cf1$plantLineage, droplevels(cf1$trt)), mean, na.rm = TRUE)
cts <- tapply(X = cf1$Freq, INDEX = interaction(cf1$plantLineage, droplevels(cf1$trt)), length)

trts <- do.call(rbind, strsplit(rownames(avgClf), split = "\\."))


avgDF <- data.frame(cbind( avgClf, trts, cts))
colnames(avgDF) <- c("avgFreq", "plantLineage", "trt", "count")

avgDF$avgFreq <- round(as.numeric(as.character(avgDF$avgFreq)))

avgDF <- avgDF[order(avgDF$plantLineage, avgDF$trt), ]

avgDF$trt <- relevel(as.factor(avgDF$trt), ref = "Control")
avgDF <- na.omit(avgDF)



cf1 <- cf1[order(cf1$plantLineage, as.character(cf1$trt)), ]

acn <- character()
for(ii in 1:nrow(avgDF)){
     plLin <- as.character(avgDF$plantLineage[ii])
     acn[ii] = cf1$accessNum[grep(plLin,  cf1$accessNum)[1]]
}
avgDF$accessNum <- acn

avgDF$avgFreq
##  [1]  2  5  3 12  9 12  8 35  0  0  6 11 13  7 11  8  0  4  1  8  8 27 18
## [24] 17  6 16 15 14  0 18 62  1 19 15 18  2  0  3 14  1  0  4 26  0  5 17
## [47] 21  0 34 13 22  6 25 17 32  0  9 14  0  0  1  9  1  4  6  7
ggplot(avgDF, aes(x = trt , y = avgFreq, fill = trt)) + 
     geom_boxplot() + 
     geom_point(aes(color = plantLineage == "673_69"))

ggplot(avgDF, aes(x = trt , y = avgFreq, fill = trt)) + 
     geom_boxplot() +
     geom_point(aes(color = plantLineage == "673_69"), position = position_jitter(width = 0.1, height = 0)) + 
     labs(y = "Number of fruits", x = "Treatment") + 
     ylim(c(0, 70)) + 
     theme(axis.text.x = element_text(angle = 35, hjust = 1), 
           legend.position = 'none') + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

ggplot(countFin[!(countFin$trt %in% 'Control_2'), ], aes(x = trt , y = Freq, fill = trt)) + 
     geom_boxplot() +
     geom_point(aes(color = plantLineage == "673_69"), position = position_jitter(width = 0.1, height = 0)) + 
     ylim(c(0, 70)) + 
     labs(y = "Number of fruits", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 35, hjust = 1), 
           legend.position = 'none') + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

nrow(avgDF)
## [1] 66
avgDF$count
##                    12_2006.Bagged           12_2006.Bagged & Selfed 
##                                 1                                 1 
##                   12_2006.Control     12_2006.Unbagged & Outcrossed 
##                                 1                                 1 
##                     128_96.Bagged            128_96.Bagged & Selfed 
##                                 1                                 1 
##                    128_96.Control      128_96.Unbagged & Outcrossed 
##                                 1                                 1 
##                     132_98.Bagged            132_98.Bagged & Selfed 
##                                 1                                 1 
##                    132_98.Control      132_98.Unbagged & Outcrossed 
##                                 1                                 1 
##                     150_58.Bagged            150_58.Bagged & Selfed 
##                                 1                                 1 
##                    150_58.Control      150_58.Unbagged & Outcrossed 
##                                 1                                 1 
##                     232_46.Bagged            232_46.Bagged & Selfed 
##                                 1                                 1 
##                    232_46.Control      232_46.Unbagged & Outcrossed 
##                                 1                                 1 
##                      35_40.Bagged             35_40.Bagged & Selfed 
##                                 1                                 1 
##                     35_40.Control       35_40.Unbagged & Outcrossed 
##                                 1                                 1 
##                      39_60.Bagged             39_60.Bagged & Selfed 
##                                 1                                 1 
##       39_60.Unbagged & Outcrossed                     425_74.Bagged 
##                                 1                                 1 
##            425_74.Bagged & Selfed                    425_74.Control 
##                                 1                                 1 
##      425_74.Unbagged & Outcrossed                     440_66.Bagged 
##                                 1                                 1 
##            440_66.Bagged & Selfed                    440_66.Control 
##                                 1                                 1 
##      440_66.Unbagged & Outcrossed                      46_18.Bagged 
##                                 1                                 1 
##             46_18.Bagged & Selfed                     46_18.Control 
##                                 1                                 1 
##       46_18.Unbagged & Outcrossed                     572_64.Bagged 
##                                 1                                 1 
##            572_64.Bagged & Selfed                    572_64.Control 
##                                 1                                 1 
##      572_64.Unbagged & Outcrossed                     624_64.Bagged 
##                                 1                                 1 
##            624_64.Bagged & Selfed                    624_64.Control 
##                                 1                                 1 
##      624_64.Unbagged & Outcrossed                     667_66.Bagged 
##                                 1                                 1 
##            667_66.Bagged & Selfed                    667_66.Control 
##                                 1                                 1 
##      667_66.Unbagged & Outcrossed                     673_69.Bagged 
##                                 1                                 6 
##            673_69.Bagged & Selfed                    673_69.Control 
##                                 6                                 6 
##      673_69.Unbagged & Outcrossed                     685_70.Bagged 
##                                 4                                 1 
##                    685_70.Control      685_70.Unbagged & Outcrossed 
##                                 1                                 1 
##                     721_79.Bagged            721_79.Bagged & Selfed 
##                                 1                                 1 
##                    721_79.Control      721_79.Unbagged & Outcrossed 
##                                 1                                 1 
##                UNLABELED_1.Bagged       UNLABELED_1.Bagged & Selfed 
##                                 1                                 1 
##               UNLABELED_1.Control UNLABELED_1.Unbagged & Outcrossed 
##                                 1                                 1 
## Levels: 1 4 6
m1 <- glmer(avgFreq ~ trt + (1|accessNum), family = poisson, data = avgDF)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: avgFreq ~ trt + (1 | accessNum)
##    Data: avgDF
## 
##      AIC      BIC   logLik deviance df.resid 
##    532.8    543.7   -261.4    522.8       61 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.5826 -1.2609 -0.2704  0.8947  5.2137 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  accessNum (Intercept) 0.3939   0.6276  
## Number of obs: 66, groups:  accessNum, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.06227    0.17464  11.808  < 2e-16 ***
## trtBagged                -0.93468    0.14975  -6.242 4.33e-10 ***
## trtBagged & Selfed        0.02795    0.11430   0.245    0.807    
## trtUnbagged & Outcrossed  0.72430    0.09851   7.353 1.95e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.256              
## trtBggd&Slf -0.334  0.393       
## trtUnbggd&O -0.389  0.455  0.597
avgPreds <- predict(m1)

length(avgPreds)
## [1] 66

Create a model

m1 <- glmer(Freq ~ trt + I(plantLineage == "673_69") +  (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ trt + I(plantLineage == "673_69") + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    705.7    720.3   -346.8    693.7       78 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.827 -1.299 -0.306  1.134  5.617 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  accessNum (Intercept) 0.3218   0.5673  
## Number of obs: 84, groups:  accessNum, 22
## 
## Fixed effects:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      1.96660    0.16118  12.201  < 2e-16 ***
## trtBagged                       -0.97014    0.12276  -7.903 2.73e-15 ***
## trtBagged & Selfed               0.18833    0.08836   2.131  0.03306 *  
## trtUnbagged & Outcrossed         0.75376    0.08360   9.016  < 2e-16 ***
## I(plantLineage == "673_69")TRUE  0.81679    0.28097   2.907  0.00365 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S trtU&O
## trtBagged   -0.219                     
## trtBggd&Slf -0.301  0.396              
## trtUnbggd&O -0.348  0.420  0.583       
## I(L=="673_6 -0.501  0.002  0.000  0.032
factorPreds <- predict(m1, newdata = avgDF)
length(factorPreds)
## [1] 66
plot(avgPreds, factorPreds)
abline(0,1)

m1 <- glmer(Freq ~ trt + (1|plantLineage) +  (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    709.4    724.0   -348.7    697.4       78 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8555 -1.3244 -0.3544  1.1650  5.5802 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.2085   0.4566  
##  plantLineage (Intercept) 0.2278   0.4773  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.04258    0.17623  11.590  < 2e-16 ***
## trtBagged                -0.97065    0.12274  -7.908 2.62e-15 ***
## trtBagged & Selfed        0.18796    0.08836   2.127   0.0334 *  
## trtUnbagged & Outcrossed  0.74998    0.08352   8.980  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.200              
## trtBggd&Slf -0.275  0.396       
## trtUnbggd&O -0.312  0.421  0.583
m1_1 <- glmer(Freq ~ trt + (1|plantLineage), family = poisson, data = cf1)
anova(m1, m1_1, test = "LRT") # keep accessNum
## Data: cf1
## Models:
## m1_1: Freq ~ trt + (1 | plantLineage)
## m1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m1_1  5 768.19 780.35 -379.10   758.19                             
## m1    6 709.42 724.01 -348.71   697.42 60.773      1  6.404e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    709.4    724.0   -348.7    697.4       78 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8555 -1.3244 -0.3544  1.1650  5.5802 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.2085   0.4566  
##  plantLineage (Intercept) 0.2278   0.4773  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.04258    0.17623  11.590  < 2e-16 ***
## trtBagged                -0.97065    0.12274  -7.908 2.62e-15 ***
## trtBagged & Selfed        0.18796    0.08836   2.127   0.0334 *  
## trtUnbagged & Outcrossed  0.74998    0.08352   8.980  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.200              
## trtBggd&Slf -0.275  0.396       
## trtUnbggd&O -0.312  0.421  0.583
merPreds <- predict(m1, newdata = avgDF)
plot(factorPreds, merPreds)
abline(0,1)

m1 <- glmer(Freq ~ trt + (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ trt + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    710.9    723.0   -350.4    700.9       79 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8677 -1.3540 -0.3762  1.1541  5.5431 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  accessNum (Intercept) 0.4585   0.6771  
## Number of obs: 84, groups:  accessNum, 22
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.18977    0.16026  13.664  < 2e-16 ***
## trtBagged                -0.97104    0.12274  -7.911 2.55e-15 ***
## trtBagged & Selfed        0.18792    0.08836   2.127   0.0335 *  
## trtUnbagged & Outcrossed  0.74877    0.08357   8.960  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.219              
## trtBggd&Slf -0.302  0.396       
## trtUnbggd&O -0.335  0.421  0.583
nonIndPreds <- predict(m1, newdata = avgDF)
plot(nonIndPreds, merPreds)
abline(0,1)

# calculate LRT for trt
m2 <- update(m1, .~. - trt)
anova(m1, m2) # highly significant
## Data: cf1
## Models:
## m2: Freq ~ (1 | accessNum)
## m1: Freq ~ trt + (1 | accessNum)
##    Df     AIC     BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m2  2 1000.39 1005.25 -498.19   996.39                             
## m1  5  710.86  723.01 -350.43   700.86 295.53      3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model diagnostics

m1 <- glmer(Freq ~ trt + (1|plantLineage) +  (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    709.4    724.0   -348.7    697.4       78 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8555 -1.3244 -0.3544  1.1650  5.5802 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.2085   0.4566  
##  plantLineage (Intercept) 0.2278   0.4773  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.04258    0.17623  11.590  < 2e-16 ***
## trtBagged                -0.97065    0.12274  -7.908 2.62e-15 ***
## trtBagged & Selfed        0.18796    0.08836   2.127   0.0334 *  
## trtUnbagged & Outcrossed  0.74998    0.08352   8.980  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.200              
## trtBggd&Slf -0.275  0.396       
## trtUnbggd&O -0.312  0.421  0.583
# diagnostics
# qq plot
qqnorm(resid(m1), main = "")
qqline(resid(m1)) # outliers

# residual plot
plot(fitted(m1), resid(m1), xlab = "fitted", ylab = "residuals")
abline(0,0)

# possibly outliers


# QQPlot for group-level effects
qqnorm(ranef(m1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$accessNum[[1]]) 

# # QQPlot for group-level effects
qqnorm(ranef(m1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$plantLineage[[1]])

infl <- influence(m1, obs = TRUE) # takes a while to calculate
# x11()
plot(infl, which = 'cook') # some influential points

# visualize model: 
sjp.lmer(m1, type = 'fe')

sjp.lmer(m1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...

#check assumptions of model 
overdisp_fun <- function(model) {
  ## number of variance parameters in 
  ##   an n-by-n variance-covariance matrix
  vpars <- function(m) {
    nrow(m)*(nrow(m)+1)/2
  }
  model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
  rdf <- nrow(model.frame(model))-model.df
  rp <- residuals(model,type="pearson")
  Pearson.chisq <- sum(rp^2)
  prat <- Pearson.chisq/rdf
  pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
  c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}

overdisp_fun(m1) # shows overdispersion
##        chisq        ratio          rdf            p 
## 2.727300e+02 3.496539e+00 7.800000e+01 2.078860e-23
# here's another way to check for overdispersion
residDev <- sum(residuals(m1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1) 
## [1] 4.072153

Use negative binomial model, since the poisson model is overdispersed

m1.1 <- glmer.nb(Freq ~ trt +(1|plantLineage) +  (1|accessNum), data = cf1)
summary(m1.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(2.1736)  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    571.9    588.9   -279.0    557.9       77 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3912 -0.5687 -0.1032  0.4856  2.6633 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.07951  0.2820  
##  plantLineage (Intercept) 0.33723  0.5807  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.06409    0.23661   8.723  < 2e-16 ***
## trtBagged                -1.06039    0.25396  -4.175 2.98e-05 ***
## trtBagged & Selfed        0.04771    0.24127   0.198 0.843251    
## trtUnbagged & Outcrossed  0.78092    0.23708   3.294 0.000988 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.465              
## trtBggd&Slf -0.487  0.479       
## trtUnbggd&O -0.533  0.457  0.469
m1.2 <- update(m1.1, .~. - (1|accessNum))
anova(m1.1, m1.2, test = "LRT")
## Data: cf1
## Models:
## m1.2: Freq ~ trt + (1 | plantLineage)
## m1.1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m1.2  6 570.42 585.00 -279.21   558.42                         
## m1.1  7 571.92 588.93 -278.96   557.92 0.5036      1     0.4779
summary(m1.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(2.1736)  ( log )
## Formula: Freq ~ trt + (1 | plantLineage)
##    Data: cf1
## 
##      AIC      BIC   logLik deviance df.resid 
##    570.4    585.0   -279.2    558.4       78 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3923 -0.5796 -0.1868  0.4738  2.6743 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  plantLineage (Intercept) 0.4044   0.636   
## Number of obs: 84, groups:  plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.04901    0.23387   8.762  < 2e-16 ***
## trtBagged                -1.04929    0.25272  -4.152 3.29e-05 ***
## trtBagged & Selfed        0.07543    0.23753   0.318 0.750825    
## trtUnbagged & Outcrossed  0.78304    0.23636   3.313 0.000923 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.465              
## trtBggd&Slf -0.480  0.476       
## trtUnbggd&O -0.538  0.461  0.475
# m1.1 <- glmer.nb(Freq ~ trt + I(plantLineage == "673_69") +  (1|accessNum), data = cf1)
# summary(m1.1)

# the model below will not converge
# m1.1a <- glmer.nb(Freq ~ trt + I(plantLineage == "673_69") + trt: I(plantLineage == "673_69") + (1|accessNum), data = cf1, control=glmerControl(optimizer="bobyqa"))
# summary(m1.1a)

# m1.1 <- glmer.nb(avgFreq ~ trt +(1|accessNum), data = avgDF)
# summary(m1.1)

# get overall p-value for treatment
m2.1 <- update(m1.1, .~. - trt)
anova(m1.1, m2.1, test = 'LRT')
## Data: cf1
## Models:
## m2.1: Freq ~ (1 | plantLineage) + (1 | accessNum)
## m1.1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##      Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)    
## m2.1  4 618.65 628.37 -305.32   610.65                            
## m1.1  7 571.92 588.93 -278.96   557.92 52.73      3  2.093e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Negative binomial model diagnostics

overdisp_fun(m1.2) 
##      chisq      ratio        rdf          p 
## 64.4071127  0.8152799 79.0000000  0.8824556
# here's another way to check for overdispersion
residDev <- sum(residuals(m1.2, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1.2) 
## [1] 1.196974
# diagnostics
# qq plot
qqnorm(resid(m1.2), main = "")
qqline(resid(m1.2)) # a little better

# residual plot
plot(fitted(m1.2), resid(m1.2), xlab = "fitted", ylab = "residuals")
abline(0,0) # residuals look better for this model

# QQPlot for group-level effects
# qqnorm(ranef(m1.2)$accessNum[[1]], main="Normal Q-Q plot for random effects")
# qqline(ranef(m1.2)$accessNum[[1]]) 

qqnorm(ranef(m1.2)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1.2)$plantLineage[[1]]) 

infl <- influence(m1.1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1

# visualize model: 
sjp.lmer(m1.2, type = 'fe')

sjp.lmer(m1.2, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...

# get estimated dispersion parameter for NB Model
getME(m1.1, "glmer.nb.theta") # 2.17
## [1] 2.17355
# post-hoc pairwise comparisons with adjusted p-values
# from documentation: 
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(m1.2, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: glmer(formula = Freq ~ trt + (1 | plantLineage), data = cf1, 
##     family = negative.binomial(theta = 2.17355047077783))
## 
## Linear Hypotheses:
##                                              Estimate Std. Error z value
## Control - Bagged == 0                         1.04929    0.25272   4.152
## Control - Bagged & Selfed == 0               -0.07543    0.23753  -0.318
## Control - Unbagged & Outcrossed == 0         -0.78304    0.23636  -3.313
## Bagged - Bagged & Selfed == 0                -1.12472    0.25133  -4.475
## Bagged - Unbagged & Outcrossed == 0          -1.83233    0.25419  -7.208
## Bagged & Selfed - Unbagged & Outcrossed == 0 -0.70761    0.24269  -2.916
##                                              Pr(>|z|)    
## Control - Bagged == 0                         < 0.001 ***
## Control - Bagged & Selfed == 0                0.98891    
## Control - Unbagged & Outcrossed == 0          0.00529 ** 
## Bagged - Bagged & Selfed == 0                 < 0.001 ***
## Bagged - Unbagged & Outcrossed == 0           < 0.001 ***
## Bagged & Selfed - Unbagged & Outcrossed == 0  0.01840 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Visualize annotated plot for fruit counts

count_pub <- within(countFin, {
     trt <- mapvalues(trt, from = c("Control","Control_2", "Bagged", "Bagged & Selfed", 
                                                "Unbagged & Outcrossed"), 
                      to = c("Control","Control_2", "Bagged", "Bagged\n & Selfed", 
                                                "Outcrossed"))
})

count_pub <- droplevels(count_pub)


ggplot(count_pub[!(count_pub$trt %in% 'Control_2'), ], aes(x = trt , y = Freq+ 0.1)) + 
     geom_boxplot(width = 0.2, fill = 'grey80') +
     labs(y = "Number of fruits", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') + 
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 10, label=c("a", "b", "a", "c"),
               color="black") + 
     scale_y_log10(breaks = c(1, 10, 100), limits = c(0.1, 100))

ggsave(paste0(saveDir, "KalmiaFruitNumber_logScale.pdf"), width = 3, height = 4)

Visualize CI’s for each of the groups

# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
cf1.1 <- within(cf1, {
     trt = as.factor(as.character(cf1$trt))
})

# refit model with different reference levels

m1.2 <- glmer.nb(Freq ~ trt + (1 | plantLineage) + (1 | accessNum), data = cf1.1, glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000)))
summary(m1.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(2.1735)  ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: cf1.1
## 
##      AIC      BIC   logLik deviance df.resid 
##    571.9    588.9   -279.0    557.9       77 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3912 -0.5687 -0.1032  0.4856  2.6633 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.07951  0.2820  
##  plantLineage (Intercept) 0.33722  0.5807  
## Number of obs: 84, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.0037     0.2542   3.949 7.84e-05 ***
## trtBagged & Selfed         1.1081     0.2531   4.379 1.19e-05 ***
## trtControl                 1.0604     0.2540   4.175 2.97e-05 ***
## trtUnbagged & Outcrossed   1.8413     0.2563   7.183 6.81e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtB&S trtCnt
## trtBggd&Slf -0.544              
## trtControl  -0.566  0.547       
## trtUnbggd&O -0.598  0.532  0.568
pframe <- data.frame(trt = levels(droplevels(cf1.1$trt)))
pframe$Freq <- 0
pp <- predict(m1.2, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
mm <- model.matrix(terms(m1.2), pframe)
predFun<-function(.) exp(mm%*%fixef(.) )
bb<-bootMer(m1.2,FUN=predFun,nsim=200) #do this 200 times
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00474257 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
#as we did this 200 times the 95% CI will be bordered by the 5th and 195th value
bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb_se[1,]
pframe$bhi<-bb_se[2,]
pframe$predMean <- pp

pframe$median = tapply(cf1.1$Freq, INDEX = cf1.1$trt, median)



pframe$trt <- mapvalues(pframe$trt, from = c("Control", "Bagged", "Bagged & Selfed", 
                                                "Unbagged & Outcrossed"),
                        to = c("Control", "Bagged", "Bagged\n & Selfed", 
                                                "Outcrossed"))
pframe$trt <- relevel(pframe$trt, ref = "Control")

#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
number_ticks <- function(n) {function(limits) pretty(limits, n)}
g0 <- ggplot(pframe, aes(x=trt, y=predMean))+
     geom_point()+ 
     labs(y = "Number of fruits", x = "Treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     scale_y_log10(limits =c(1,60), breaks = number_ticks(6)) + 
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 10, label=c("a", "b", "a", "c"),
               color="black") 
g0

ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_logScale.pdf"), width = 3, height = 4)


## Bootstrap CI on linear scale -- not that great!
g1 <- ggplot(pframe, aes(x=trt, y=predMean))+
     geom_point()+ 
     labs(y = "Number of Fruits", x = "Treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+ 
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 40, label=c("a", "b", "a", "c"),
               color="black") 
g1

ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_LinearScale.pdf"), width = 3, height = 4)


cp <- droplevels(count_pub[count_pub$trt != 'Control_2',])

# this might actually be the best way
ggplot(cp, aes(x = trt , y = Freq)) + 
     geom_boxplot(width = 0.2, fill = 'grey80') +
     labs(y = "Number of fruits", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') + 
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 1, label=c("a", "b", "a", "c"),
               color="black") 

ggsave(paste0(saveDir, "KalmiaFruitNumber_LinearScale.pdf"), width = 3, height = 4)

Visualize fruit size

# calculate fruit size by plant
sizeDF_mean <- as.data.frame(tapply(kfrt$dia_mm, INDEX = kfrt$plantNum, mean))
colnames(sizeDF_mean) = "meanFrtSz"
sizeDF_mean$trt = sapply(X = 1:length(sizeDF_mean$meanFrtSz), 
                          FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]), 
                                                     split = "__")[[1]][2])

sizeDF_mean$trt <- mapvalues(sizeDF_mean$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Control", 
                                                "Unbagged & Outcrossed", "Control_2"))

# reorder
sizeDF_mean$trt <- factor(sizeDF_mean$trt, levels = c("Control", "Control_2", "Bagged", "Bagged & Selfed",  
                                                "Unbagged & Outcrossed"))

sizeDF_mean$accessNum = sapply(X = 1:length(sizeDF_mean$meanFrtSz), 
                   FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]), 
                                              split = "__")[[1]][1])


ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) + 
     geom_boxplot(alpha = 0.5) + 
#      stat_summary(fun.y=mean, geom="line", aes(group = accessNum, color = accessNum))  + 
#      stat_summary(fun.y=mean, geom="point", aes(group = accessNum, color = accessNum)) + 
     geom_point(aes(color = trt))

ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) + 
     geom_boxplot() + 
     labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") + 
     scale_fill_brewer(name = "Treatment", palette = "Set1")

ggsave(paste0(saveDir, "KalmiaFruitDiameter.pdf"), width = 10, height = 8)


# visualize , but exclude treatment 5
ggplot(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ], 
       aes(x = trt, y = meanFrtSz, fill = trt)) + 
     geom_boxplot() + 
     labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") + 
     scale_fill_brewer(name = "Treatment", palette = "Set1") + 
     theme(axis.text.x = element_text(angle = 35, hjust = 1), 
          legend.position = 'none') 

ggsave(paste0(saveDir, "KalmiaFruitDiameter_trt14Only.pdf"), width = 5, height = 4)

Model Fruit Size with LMER

size1 <- within(kfrt, {
     trt <- mapvalues(trt, c("Bagged", "Bagged & Selfed", "Unbagged", 
                                                "Unbagged & Outcrossed", "Unbagged_2"), 
                      c("Bagged", "Bagged\n & Selfed", "Control", 
                                                "Outcrossed", "Control_2"))
})

size1 <- droplevels(size1[size1$trt != "Control_2",])

# get sample sizes for size dataset
nrow(size1) # total number of fruits in analysis for size
## [1] 1035
data.frame(table(size1$accessNum)) # total number of fruits per plant
##           Var1 Freq
## 1    1129_74_A   93
## 2    1129_74_B   36
## 3    1129_74_C   69
## 4    1129_74_E   43
## 5    1129_74_F   48
## 6    12_2006_A   22
## 7  128_96_MASS   64
## 8  132_98_MASS   17
## 9     150_58_A   39
## 10    232_46_A   13
## 11     35_40_C   70
## 12     39_60_A   37
## 13    425_74_D   94
## 14    440_66_A   53
## 15     46_18_A   19
## 16 572_64_MASS   31
## 17    624_64_B   43
## 18 667_66_MASS   69
## 19    673_69_B  124
## 20    685_70_A   23
## 21    721_79_B   10
## 22 UNLABELED_1   18
data.frame(table(size1$plantLineage)) # total number of fruits per plant lineage
##           Var1 Freq
## 1      12_2006   22
## 2       128_96   64
## 3       132_98   17
## 4       150_58   39
## 5       232_46   13
## 6        35_40   70
## 7        39_60   37
## 8       425_74   94
## 9       440_66   53
## 10       46_18   19
## 11      572_64   31
## 12      624_64   43
## 13      667_66   69
## 14      673_69  413
## 15      685_70   23
## 16      721_79   10
## 17 UNLABELED_1   18
size1$trt <- relevel(as.factor(size1$trt), ref = "Control")
f1 <- lmer(dia_mm ~ trt + (1|plantLineage/accessNum), data = size1)
summary(f1) # final model for paper
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage/accessNum)
##    Data: size1
## 
## REML criterion at convergence: 1852.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8939 -0.6307  0.0377  0.6695  3.4682 
## 
## Random effects:
##  Groups                 Name        Variance Std.Dev.
##  accessNum:plantLineage (Intercept) 0.04439  0.2107  
##  plantLineage           (Intercept) 0.13357  0.3655  
##  Residual                           0.32638  0.5713  
## Number of obs: 1035, groups:  accessNum:plantLineage, 22; plantLineage, 17
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           4.23422    0.11027   38.40
## trtBagged            -0.51953    0.07274   -7.14
## trtBagged\n & Selfed  0.01439    0.05304    0.27
## trtOutcrossed         0.73741    0.04881   15.11
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.179              
## trtBggd&Slf -0.235  0.387       
## trtOutcrssd -0.296  0.404  0.537
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = size1)
summary(f1) # final model for paper (same as above)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Data: size1
## 
## REML criterion at convergence: 1852.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8939 -0.6307  0.0377  0.6695  3.4682 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  accessNum    (Intercept) 0.04439  0.2107  
##  plantLineage (Intercept) 0.13357  0.3655  
##  Residual                 0.32638  0.5713  
## Number of obs: 1035, groups:  accessNum, 22; plantLineage, 17
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           4.23422    0.11027   38.40
## trtBagged            -0.51953    0.07274   -7.14
## trtBagged\n & Selfed  0.01439    0.05304    0.27
## trtOutcrossed         0.73741    0.04881   15.11
## 
## Correlation of Fixed Effects:
##             (Intr) trtBgg trtB&S
## trtBagged   -0.179              
## trtBggd&Slf -0.235  0.387       
## trtOutcrssd -0.296  0.404  0.537
# get p-value for trt
f2 <- update(f1, .~. - trt)
anova(f1, f2, "LRT")
## refitting model(s) with ML (instead of REML)
## Data: size1
## Models:
## f2: dia_mm ~ (1 | plantLineage) + (1 | accessNum)
## f1: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
##    Df    AIC    BIC   logLik deviance Chisq Chi Df Pr(>Chisq)    
## f2  4 2234.7 2254.5 -1113.36   2226.7                            
## f1  7 1851.4 1886.0  -918.71   1837.4 389.3      3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fruit Size Diagnostics

# diagnostics
# qq plot
qqnorm(resid(f1), main = "")
qqline(resid(f1)) # good

# residual plot
plot(fitted(f1), residuals(f1, type = "deviance"), xlab = "fitted", ylab = "residuals")
abline(0,0) 

# QQPlot for group-level effects
qqnorm(ranef(f1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$accessNum[[1]]) # one possible outlier

# QQPlot for group-level effects
qqnorm(ranef(f1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$plantLineage[[1]]) # looks good

infl <- influence(f1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1

# visualize model: 
sjp.lmer(f1, type = 'fe')
## Computing p-values via Kenward-Roger approximation. Use `p.kr = FALSE` if computation takes too long.

# Best Linear Unbiased Predictors (BLUPs)
sjp.lmer(f1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...

# post-hoc pairwise comparisons with adjusted p-values
# from documentation: 
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(f1, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
## Loading required namespace: lmerTest
## Note: df set to 1020
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lmer(formula = dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum), 
##     data = size1)
## 
## Linear Hypotheses:
##                                     Estimate Std. Error t value Pr(>|t|)
## Control - Bagged == 0                0.51953    0.07274   7.142   <1e-06
## Control - Bagged\n & Selfed == 0    -0.01439    0.05304  -0.271    0.993
## Control - Outcrossed == 0           -0.73741    0.04881 -15.107   <1e-06
## Bagged - Bagged\n & Selfed == 0     -0.53392    0.07156  -7.461   <1e-06
## Bagged - Outcrossed == 0            -1.25694    0.06930 -18.138   <1e-06
## Bagged\n & Selfed - Outcrossed == 0 -0.72301    0.04916 -14.709   <1e-06
##                                        
## Control - Bagged == 0               ***
## Control - Bagged\n & Selfed == 0       
## Control - Outcrossed == 0           ***
## Bagged - Bagged\n & Selfed == 0     ***
## Bagged - Outcrossed == 0            ***
## Bagged\n & Selfed - Outcrossed == 0 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Visualize Fruit Size with annotations

# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
sf1<- within(size1, {
     trt = as.factor(as.character(size1$trt))
})

# refit model with different reference levels
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = sf1)
pframe <- data.frame(trt = levels(droplevels(sf1$trt)))
pframe$dia_mm = 0
pp <- predict(f1, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
mm <- model.matrix(terms(f1), pframe)
predFun<-function(.) mm%*%fixef(.)
bb<-bootMer(f1,FUN=predFun,nsim=200) #do this 200 times
# get quantiles from bootstrap sample
bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb_se[1,]
pframe$bhi<-bb_se[2,]
pframe$predMean <- pp
pframe # print frame, in case reporting in a table
##                 trt dia_mm      blo      bhi predMean
## 1            Bagged      0 3.445394 3.935725 3.714693
## 2 Bagged\n & Selfed      0 4.022982 4.461613 4.248615
## 3           Control      0 3.991122 4.469500 4.234223
## 4        Outcrossed      0 4.715504 5.185830 4.971629
pframe$trt <- relevel(pframe$trt, ref = "Control")

#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
g2 <- ggplot(pframe, aes(x=trt, y=predMean))+
     geom_point()+ 
     labs(y = "Fruit dia. (mm)", x = "Treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 0.7, label=c("a", "b", "a", "c"),
               color="black") 
g2

ggsave(paste0(saveDir, "KalmiaFruitDia_BSCI.pdf"), width = 3, height = 4)

 ## visualize raw data (mean fruit size per plant)

sdf1 <- droplevels(within(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ], {
     trt <- mapvalues(trt, c("Bagged", "Bagged & Selfed", "Control", 
                                                "Unbagged & Outcrossed", "Control_2"), 
                      c("Bagged", "Bagged\n & Selfed", "Control", 
                                                "Outcrossed", "Control_2"))
}))

# visualize fruit size
ggplot(sdf1, aes(x = trt, y = meanFrtSz)) + 
     geom_boxplot(width = 0.2, fill = 'grey80') +
     labs(y = "Fruit dia. (mm)", x = "Treatment") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') + 
     # annotate
     annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 2, label=c("a", "b", "a", "c"),
               color="black")

ggsave(paste0(saveDir, "KalmiaFruitDia.pdf"), width = 3, height = 4)


# compare two plots
g2 + geom_boxplot(data = sdf1, aes(x = trt, y = meanFrtSz), width = 0.2, alpha = 0)

Visualize data to compare fruit size with number of seeds

# read in data
sizeSeed <- read.csv("KalmiaFruitSizeAndSeeds.csv")

ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) + 
     geom_point() + 
     stat_smooth(method = 'lm', formula = y ~ exp(x), se = F) + 
     labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel')

ggsave(paste0(saveDir, "KalmiaFruitSeeds.pdf"), width = 5, height = 4)


# on log scale
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) + 
     geom_point() + 
     stat_smooth(method = 'lm', formula = y ~ x, se = F) + 
     scale_y_continuous(trans="log") + 
     labs(y = "log(e) number of seeds")

# GLM 
ss1 <- glm(NumSeeds ~ Dia..mm., family = poisson, data = sizeSeed)
summary(ss1)
## 
## Call:
## glm(formula = NumSeeds ~ Dia..mm., family = poisson, data = sizeSeed)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.1484  -1.3123   0.0538   1.1869   3.4399  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.22136    0.32470   0.682    0.495    
## Dia..mm.     0.63356    0.07019   9.027   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 156.083  on 18  degrees of freedom
## Residual deviance:  68.423  on 17  degrees of freedom
## AIC: 160.53
## 
## Number of Fisher Scoring iterations: 4
ss1$deviance / ss1$df.residual # overdispersed
## [1] 4.024875
# Neg Binomial Model 
ss2 <- glm.nb(NumSeeds ~ Dia..mm., data = sizeSeed)
summary(ss2) # final model for paper
## 
## Call:
## glm.nb(formula = NumSeeds ~ Dia..mm., data = sizeSeed, init.theta = 7.72630546, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.03651  -0.83952   0.02891   0.59012   1.60618  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.2739     0.5660   0.484    0.628    
## Dia..mm.      0.6215     0.1292   4.811  1.5e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(7.7263) family taken to be 1)
## 
##     Null deviance: 43.384  on 18  degrees of freedom
## Residual deviance: 18.964  on 17  degrees of freedom
## AIC: 135.72
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  7.73 
##           Std. Err.:  3.54 
## 
##  2 x log-likelihood:  -129.72
# get overall test stat for dia
ss3 <-update(ss2, .~. - Dia..mm.)
anova(ss2, ss3) # overall p-value for Dia..mm.
## Likelihood ratio tests of Negative Binomial Models
## 
## Response: NumSeeds
##      Model    theta Resid. df    2 x log-lik.   Test    df LR stat.
## 1        1 2.828473        18       -145.5067                      
## 2 Dia..mm. 7.726305        17       -129.7204 1 vs 2     1 15.78629
##        Pr(Chi)
## 1             
## 2 7.091441e-05
## visualize CI's for the two different models

newD <- data.frame(Dia..mm. = seq(2, 6, length.out = 100))
nbPreds = predict(ss2, newD, type = 'response', se.fit = TRUE)
nd1 <- data.frame(newD, nppred = nbPreds$fit, npse = nbPreds$se.fit)
predsPois <- predict(ss1, newD, type = 'response', se.fit = TRUE)


n2 <- data.frame(nd1, poisPred = predsPois$fit, poisse = predsPois$se.fit)
n2$npUpper <- with(n2,  nppred + 2 * npse)
n2$npLower <- with(n2, nppred - 2 * npse)
n2$pUpper <- with(n2,  poisPred + 2 * poisse)
n2$pLower <- with(n2, poisPred - 2 * poisse)

# compare NB errors vs. Pois errors
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) + 
     geom_point() + 
     geom_line(data = n2, aes(x = Dia..mm., y = poisPred), color = 'blue') + 
     geom_line(data = n2, aes(x = Dia..mm., y = pUpper), color = 'blue', linetype = 2) + 
     geom_line(data = n2, aes(x = Dia..mm., y = pLower), color = 'blue', linetype = 2) + 
     labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel') + 
     geom_line(data = n2, aes(x = Dia..mm., y = nppred), color = 'red') + 
     geom_line(data = n2, aes(x = Dia..mm., y = npUpper), color = 'red', linetype = 3) + 
     geom_line(data = n2, aes(x = Dia..mm., y = npLower), color = 'red', linetype = 3)

## model diagnostics for negative binomial model
plot(ss2, which = 4) #cook's distance

plot(y = residuals(ss2, type = 'deviance'), x = ss2$fitted.values)
abline(h = 0)

# should be close to 1
ss2$deviance / ss2$df.residual
## [1] 1.115512
# get sample size
nrow(sizeSeed) # total number of individual fruits
## [1] 19
sum(sizeSeed$NumSeeds) # total number of seeds counted
## [1] 382